*** OPEN TEST SCORE FILE 

/* Notes: download the test score files, create another excel file with two sheets
one named "grade12" and one named "grade12" and put in local disk */

********************************************************************************
*
*			GRADE 12 STUDENTS
*
********************************************************************************
clear 
import excel "C:\Users\trinh\OneDrive\Desktop\testscore.xlsx", sheet("grade12") firstrow

rename *, lower

g score_eng = tot1 if sub1=="ENG"
g score_dzongkhag = tot2 if sub2=="DZO"

g score_average = (tot1*(~missing(tot1)) + tot2*(~missing(tot2)) + tot3*(~missing(tot3)) + tot4*(~missing(tot4)) + tot5*(~missing(tot5)) + tot6*(~missing(tot6)) + tot7*(~missing(tot7)) + tot8*(~missing(tot8)) + tot9*(~missing(tot9)))/(~missing(tot1) + ~missing(tot2) + ~missing(tot3) + ~missing(tot4) + ~missing(tot5) + ~missing(tot6) + ~missing(tot7) + ~missing(tot8) + ~missing(tot9))

g passing_bhsec = result == "PASS CERTIFICATE AWARDED"
lab var passing_bhsec "1 = Pass Certificate Awarded"

rename cidnumber citizenid
replace citizenid = "" if length(citizenid)~=11
replace citizenid = "" if citizenid == "UNDER PROCE"

rename contactnumber phone 

g sex = 1 if gender=="M"
replace sex = 0 if gender=="F"

g name = studentname
replace name = trim(itrim(lower(name)))
replace name = subinstr(name," ","",.)  
replace name = subinstr(name,",","",.)   //Removes comma (,)
replace name = subinstr(name,"'","",.)   //Removes apostrophe (')
replace name = subinstr(name,".","",.)   //Removes dot (.) 
replace name = subinstr(name,"/","",.)   //Removes slash (/)
replace name = subinstr(name,"-","",.)   //Removes dash (-)
replace name = subinstr(name,"=","",.)   //Removes dash (-)
replace name = subinstr(name,"(","",.)   //Removes opening parentheses
replace name = subinstr(name,")","",.)   //Removes closing parentheses

replace schoolname = trim(itrim(lower(schoolname)))
replace dateofbirth = subinstr(dateofbirth,"-","",.)

g day = substr(dateofbirth, 7, 2)
g month = substr(dateofbirth, 5, 2)
g year = substr(dateofbirth, 1, 4)

g birthday = day + month + year

rename stream astream
g stream=1 if astream=="ARTS"
replace stream=2 if astream=="COMMERCE"
replace stream=3 if astream=="SCIENCE"

// some schools with variant names in the score database compared to ours
// make sure to correct, either way to match
replace schoolname = "bajo higher secondary school" if schoolname == "bajothang higher secondary school"
replace schoolname = "baylling central school" if schoolname == "bayling central school"
replace schoolname = "dechencholing higher secondary school" if schoolname == "dechhenchoeling higher secondary school"
replace schoolname = "desi higher secondary school" if schoolname == "desi high school"
replace schoolname = "pakshika central school" if schoolname == "pakshikha central school"
replace schoolname = "pelkhil higher secondary school" if schoolname == "pelkhil school"

keep schoolname name phone stream sex citizenid score_* passing_bhsec indexnumber dateofbirth birthday
foreach i in schoolname name phone sex stream citizenid dateofbirth birthday {
    rename `i' `i'_s
}

save $temp/score12.dta, replace 

// Step 1: matched citizenid + fuzzy name 
gl criteria1 citizenid  

* open the score data 
use $temp/score12.dta, clear 
rename citizenid_s citizenid 

duplicates tag $criteria1, g(temp)

* obtain the subset of unique citizenid
preserve 
keep if temp==0
drop temp
save $temp/score12_step1u, replace 
restore 

* keep the duplicated for next round match
keep if temp~=0
drop temp
cap rename citizenid citizenid_s
save $temp/score12_step1d, replace

* open the survey data 
use "$temp/TVET_analysis.dta", clear
recode sex (2=0)
replace name = subinstr(name," ","",.) // remove space 
cap drop dateofbirth 

g double dateofbirth = b_year*10^4 + b_month_b*10^2 + b_day
format dateofbirth %15.0f
tostring dateofbirth, replace 
lab var dateofbirth "date of birth YYYYMMDD"

g day = substr(dateofbirth, 7, 2)
g month = substr(dateofbirth, 5, 2)
g year = substr(dateofbirth, 1, 4)

g birthday = day + month + year

generate str cid_string = citizenid
replace citizenid = ""
compress citizenid
replace citizenid = cid_string
drop cid_string
describe citizenid

save $temp/data12, replace 

use $temp/data12, clear 

duplicates tag $criteria1, g(temp)

* obtain the subset of unique citizenid
preserve 
keep if temp==0
drop temp
save $temp/data12_step1u, replace 
restore 

* keep the duplicated for next round match
keep if temp~=0
drop temp
save $temp/data12_step1d, replace

* merging two unique data together using fully matched CID and fuzzy name
use $temp/score12_step1u, clear
merge 1:1 $criteria1 using $temp/data12_step1u
matchit name_s name, g(namescore)

br name* if namescore<0.7 & _merge==3
* correct if names are not matched, leave for the next round of matching
replace _merge = . if inlist(citizenid, "10807002633", "11309000812", "11402000288", "11801002366")

* keep matched data 
preserve 
keep if _merge==3 
keep indexnumber uniqueid 
save $temp/score12_step1m, replace 
restore 

* keep unmatched data from score database, merge with the duplicated from previous step
preserve 
keep if _merge==1 | _merge==.
keep indexnumber 
merge 1:1 indexnumber using $temp/score12.dta, keep(matched) nogen 
append using $temp/score12_step1d
save $temp/score12_step2b, replace 
restore 

preserve 
keep if _merge==2 | _merge==.
keep responseid_b schoolname_b 
merge 1:1 responseid_b schoolname_b using $temp/data12.dta, keep(matched) nogen 
append using $temp/data12_step1d
save $temp/data12_step2b, replace 
restore 

// Step 2: matching schoolname + name + sex + fuzzy date of birth 
gl criteria2 schoolname_b name sex 

* open the score dataset 
use $temp/score12_step2b, clear 
cap rename name_s name 
cap rename schoolname_s schoolname_b 
cap rename sex_s sex 

duplicates tag $criteria2, g(temp)


* obtain the subset of unique citizenid
preserve 
keep if temp==0
drop temp
save $temp/score12_step2u, replace 
restore 

* keep the duplicated for next round match
keep if temp~=0
drop temp
cap rename name name_s 
cap rename schoolname_b schoolname_s 
cap rename sex sex_s 
save $temp/score12_step2d, replace

* open the survey data 
use $temp/data12_step2b.dta, clear
replace name = subinstr(name," ","",.) // remove space 

duplicates tag $criteria2, g(temp)

* obtain the subset of unique citizenid
preserve 
keep if temp==0
drop temp
save $temp/data12_step2u, replace 
restore 

* keep the duplicated for next round match
keep if temp~=0
drop temp
save $temp/data12_step2d, replace

* merging two unique data together using fully matched CID and fuzzy name
clear
use $temp/score12_step2u
merge 1:1 $criteria2 using $temp/data12_step2u
matchit dateofbirth_s dateofbirth, g(birthscore)
matchit birthday_s birthday, g(birthdscore)

replace _merge = . if _merge==3 & ~(birthscore>0.70 | birthdscore>0.70) // allow 1-digit/unit mismatch in date of birth 

* keep matched data 
preserve 
keep if _merge==3 
keep indexnumber uniqueid 
save $temp/score12_step2m, replace 
restore 

* keep unmatched data from score database, merge with the duplicated from previous step
preserve 
keep if _merge==1 | _merge==.
keep indexnumber 
merge 1:1 indexnumber using $temp/score12.dta, keep(matched) nogen 
append using $temp/score12_step2d
save $temp/score12_step3b, replace 
restore 

preserve 
keep if _merge==2 | _merge==.
keep responseid_b schoolname_b 
merge 1:1 responseid_b schoolname_b using $temp/data12.dta, keep(matched) nogen 
append using $temp/data12_step2d
save $temp/data12_step3b, replace 
restore 

// Step 3: schoolname + sex + birth date + fuzzy name 
gl criteria3 schoolname_b sex dateofbirth 

use $temp/score12_step3b, clear
rename schoolname_s schoolname_b 
rename sex_s sex 
rename dateofbirth_s dateofbirth 

duplicates tag $criteria3, g(temp)

preserve 
keep if temp==0
drop temp
save $temp/score12_step3u, replace 
restore 

keep if temp~=0 
drop temp
cap { 
rename schoolname_b schoolname_s  
rename sex sex_s  
rename dateofbirth dateofbirth_s 
} 
save $temp/score12_step3d, replace 


use $temp/data12_step3b, clear
duplicates tag $criteria3, g(temp)

preserve 
keep if temp==0
drop temp
save $temp/data12_step3u, replace 
restore 

keep if temp~=0 
drop temp 
save $temp/data12_step3d, replace

use $temp/score12_step3u, clear 
merge 1:1 $criteria3 using $temp/data12_step3u
matchit name name_s, g(namescore)

* manual check: 
replace _merge=. if inlist(citizenid_s, "10903000461")

preserve 
keep if _merge==3 
keep uniqueid indexnumber 
save $temp/score12_step3m, replace 
restore 

* keep unmatched data from score database, merge with the duplicated from previous step
preserve 
keep if _merge==1 | _merge==.
keep indexnumber 
merge 1:1 indexnumber using $temp/score12.dta, keep(matched) nogen 
append using $temp/score12_step3d
save $temp/score12_step4b, replace 
restore 

preserve 
keep if _merge==2 | _merge==.
keep responseid_b schoolname_b 
merge 1:1 responseid_b schoolname_b using $temp/data12.dta, keep(matched) nogen 
append using $temp/data12_step3d
save $temp/data12_step4b, replace 
restore 


// Step 4: sex + name + fuzzy phone (in case students transfered)
gl criteria4 sex name  

use $temp/score12_step4b, clear
rename sex_s sex 
rename name_s name 

duplicates tag $criteria4, g(temp)

preserve 
keep if temp==0
drop temp
save $temp/score12_step4u, replace 
restore 

keep if temp~=0 
drop temp
cap {  
rename sex sex_s  
rename name name_s
} 
save $temp/score12_step4d, replace 


use $temp/data12_step4b, clear
duplicates tag $criteria4, g(temp)

preserve 
keep if temp==0
drop temp
save $temp/data12_step4u, replace 
restore 

keep if temp~=0 
drop temp 
save $temp/data12_step4d, replace

use $temp/score12_step4u, clear 
merge 1:1 $criteria4 using $temp/data12_step4u
matchit phone phone_s, g(phonescore)

replace _merge=. if phonescore<0.9 & _merge==3 

* keep 
preserve 
keep if _merge==3 
keep uniqueid indexnumber
save $temp/score12_step4m, replace 
restore 

* keep unmatched data from score database, merge with the duplicated from previous step
preserve 
keep if _merge==1 | _merge==.
keep indexnumber 
merge 1:1 indexnumber using $temp/score12.dta, keep(matched) nogen 
append using $temp/score12_step4d
save $temp/score12_step5b, replace 
restore 

preserve 
keep if _merge==2 | _merge==.
keep responseid_b schoolname_b 
merge 1:1 responseid_b schoolname_b using $temp/data12.dta, keep(matched) nogen 
append using $temp/data12_step4d
save $temp/data12_step5b, replace 
restore 

// Step 5: sex + phone + fuzzy name 
gl criteria5 sex phone  

use $temp/score12_step5b, clear
rename sex_s sex 
rename phone_s phone 

duplicates tag $criteria5, g(temp)

preserve 
keep if temp==0
drop temp
save $temp/score12_step5u, replace 
restore 

keep if temp~=0 
drop temp
cap {  
rename sex sex_s  
rename phone phone_s
} 
save $temp/score12_step5d, replace 


use $temp/data12_step5b, clear
duplicates tag $criteria5, g(temp)
tab temp 

preserve 
keep if temp==0
drop temp
save $temp/data12_step5u, replace 
restore 

keep if temp~=0 
drop temp 
save $temp/data12_step5d, replace

use $temp/score12_step5u, clear 
merge 1:1 $criteria5 using $temp/data12_step5u
matchit name name_s, g(namescore)
matchit dateofbirth dateofbirth_s, g(birthscore)

br name* namescore if _merge==3
* accept all

* keep 
preserve 
keep if _merge==3 
keep uniqueid indexnumber
save $temp/score12_step5m, replace 
restore 

* keep unmatched data from score database, merge with the duplicated from previous step
preserve 
keep if _merge==1 | _merge==.
keep indexnumber 
merge 1:1 indexnumber using $temp/score12.dta, keep(matched) nogen 
append using $temp/score12_step5d
save $temp/score12_step6b, replace 
restore 

preserve 
keep if _merge==2 | _merge==.
keep responseid_b schoolname_b 
merge 1:1 responseid_b schoolname_b using $temp/data12.dta, keep(matched) nogen 
append using $temp/data12_step5d
save $temp/data12_step6b, replace 
restore 

// Step 6: sex + birthday + stream + fuzzy name? 
gl criteria6 sex dateofbirth stream 

use $temp/score12_step6b, clear
rename sex_s sex 
rename stream_s stream
rename dateofbirth_s dateofbirth

duplicates tag $criteria6, g(temp)
tab temp

preserve 
keep if temp==0
drop temp
save $temp/score12_step6u, replace 
restore 

keep if temp~=0 
drop temp
cap {  
rename sex sex_s  
rename stream stream_s 
rename dateofbirth dateofbirth_s 
} 
save $temp/score12_step6d, replace 


use $temp/data12_step6b, clear
duplicates tag $criteria6, g(temp)
tab temp 

preserve 
keep if temp==0
drop temp
save $temp/data12_step6u, replace 
restore 

keep if temp~=0 
drop temp 
save $temp/data12_step6d, replace

use $temp/score12_step6u, clear 
merge 1:1 $criteria6 using $temp/data12_step6u
matchit name name_s, g(namescore)

br name* namescore if _merge==3
replace _merge = . if _merge==3 & namescore<0.9

* keep 
preserve 
keep if _merge==3 
keep uniqueid indexnumber
save $temp/score12_step6m, replace 
restore 

* keep unmatched data from score database, merge with the duplicated from previous step
preserve 
keep if _merge==1 | _merge==.
keep indexnumber 
merge 1:1 indexnumber using $temp/score12.dta, keep(matched) nogen 
append using $temp/score12_step6d
save $temp/score12_step7b, replace 
restore 

preserve 
keep if _merge==2 | _merge==.
keep responseid_b schoolname_b 
merge 1:1 responseid_b schoolname_b using $temp/data12.dta, keep(matched) nogen 
append using $temp/data12_step6d
save $temp/data12_step7b, replace 
restore 

* create a new anonymous score dataset to match with analysis dataset 
use $temp/score12_step1m, clear
g note = "criteria: perfect cid + fuzzy name"
append using $temp/score12_step2m
replace note = "criteria: school + name + sex + fuzzy birthday" if missing(note)
append using $temp/score12_step3m
replace note = "criteria: school + sex + birthday + fuzzy name" if missing(note)
append using $temp/score12_step4m
replace note = "criteria: sex + name + fuzzy phone" if missing(note)
append using $temp/score12_step5m
replace note = "criteria: sex + phone + fuzzy name" if missing(note)
append using $temp/score12_step6m
replace note = "criteria: sex + birthday + stream + fuzzy name" if missing(note)
count // 5420

merge 1:1 indexnumber using $temp/score12.dta, keep(matched) nogen 
drop *_s  

save $clean/exam_score12.dta, replace 

* clear all temporary datasets 
global tempfilelist: dir "$temp/" files "score12_*.dta"
	foreach tfile of global tempfilelist {
	erase "$temp/`tfile'"
}

global tempfilelist: dir "$temp/" files "data12_*.dta"
	foreach tfile of global tempfilelist {
	erase "$temp/`tfile'"
}


 
